<!DOCTYPE html>
<html lang="zh-CN">
<head>
  <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 5.4.0">
  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png">
  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png">
  <link rel="mask-icon" href="/images/logo.svg" color="#222">

<link rel="stylesheet" href="/css/main.css">

<link rel="stylesheet" href="//fonts.googleapis.com/css?family=Monda:300,300italic,400,400italic,700,700italic|Roboto Slab:300,300italic,400,400italic,700,700italic|PT Mono:300,300italic,400,400italic,700,700italic&display=swap&subset=latin,latin-ext">
<link rel="stylesheet" href="/lib/font-awesome/css/all.min.css">

<script id="hexo-configurations">
    var NexT = window.NexT || {};
    var CONFIG = {"hostname":"wangxl12.github.io","root":"/","scheme":"Gemini","version":"7.8.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12,"onmobile":true},"copycode":{"enable":true,"show_result":true,"style":"mac"},"back2top":{"enable":true,"sidebar":false,"scrollpercent":true},"bookmark":{"enable":true,"color":"#222","save":"auto"},"fancybox":false,"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":"valine","storage":true,"lazyload":false,"nav":null,"activeClass":"valine"},"algolia":{"hits":{"per_page":10},"labels":{"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}},"localsearch":{"enable":true,"trigger":"auto","top_n_per_article":1,"unescape":false,"preload":false},"motion":{"enable":true,"async":true,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},"path":"search.xml"};
  </script>

  <meta name="description" content="在学习《动手学习深度学习》教程的时候对于多项式拟合这一节有一些需要记录的内容. 原教程见这里：https:&#x2F;&#x2F;zh-v2.d2l.ai&#x2F;chapter_multilayer-perceptrons&#x2F;underfit-overfit.html">
<meta property="og:type" content="article">
<meta property="og:title" content="多项式拟合">
<meta property="og:url" content="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/index.html">
<meta property="og:site_name" content="WXL&#39;s blog">
<meta property="og:description" content="在学习《动手学习深度学习》教程的时候对于多项式拟合这一节有一些需要记录的内容. 原教程见这里：https:&#x2F;&#x2F;zh-v2.d2l.ai&#x2F;chapter_multilayer-perceptrons&#x2F;underfit-overfit.html">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E6%AD%A3%E5%B8%B8.png">
<meta property="og:image" content="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E6%AC%A0%E6%8B%9F%E5%90%88.png">
<meta property="og:image" content="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E8%BF%87%E6%8B%9F%E5%90%88.png">
<meta property="og:image" content="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E8%AE%AD%E7%BB%83%E6%8D%9F%E5%A4%B1.png">
<meta property="og:image" content="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E6%95%B0%E6%8D%AE%E9%87%8F.png">
<meta property="og:image" content="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E4%B8%8D%E6%A0%87%E5%87%86%E5%8C%96.png">
<meta property="og:image" content="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E4%B8%8D%E6%A0%87%E5%87%86%E5%8C%96%E5%A2%9E%E5%8A%A0%E5%A4%9A%E9%A1%B9%E5%BC%8F%E9%A1%B9%E6%95%B0.png">
<meta property="article:published_time" content="2021-11-22T11:22:23.000Z">
<meta property="article:modified_time" content="2021-11-23T03:14:18.937Z">
<meta property="article:author" content="WXL">
<meta property="article:tag" content="深度学习">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E6%AD%A3%E5%B8%B8.png">

<link rel="canonical" href="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/">


<script id="page-configurations">
  // https://hexo.io/docs/variables.html
  CONFIG.page = {
    sidebar: "",
    isHome : false,
    isPost : true,
    lang   : 'zh-CN'
  };
</script>

  <title>多项式拟合 | WXL's blog</title>
  






  <noscript>
  <style>
  .use-motion .brand,
  .use-motion .menu-item,
  .sidebar-inner,
  .use-motion .post-block,
  .use-motion .pagination,
  .use-motion .comments,
  .use-motion .post-header,
  .use-motion .post-body,
  .use-motion .collection-header { opacity: initial; }

  .use-motion .site-title,
  .use-motion .site-subtitle {
    opacity: initial;
    top: initial;
  }

  .use-motion .logo-line-before i { left: initial; }
  .use-motion .logo-line-after i { right: initial; }
  </style>
</noscript>

<link rel="alternate" href="/atom.xml" title="WXL's blog" type="application/atom+xml">
<link href="https://cdn.bootcss.com/KaTeX/0.11.1/katex.min.css" rel="stylesheet" /></head>

<body itemscope itemtype="http://schema.org/WebPage">
  <div class="container use-motion">
    <div class="headband"></div>

    <header class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-container">
  <div class="site-nav-toggle">
    <div class="toggle" aria-label="切换导航栏">
      <span class="toggle-line toggle-line-first"></span>
      <span class="toggle-line toggle-line-middle"></span>
      <span class="toggle-line toggle-line-last"></span>
    </div>
  </div>

  <div class="site-meta">

    <a href="/" class="brand" rel="start">
      <span class="logo-line-before"><i></i></span>
      <h1 class="site-title">WXL's blog</h1>
      <span class="logo-line-after"><i></i></span>
    </a>
      <p class="site-subtitle" itemprop="description">Talk is cheap, show me your work.</p>
  </div>

  <div class="site-nav-right">
    <div class="toggle popup-trigger">
        <i class="fa fa-search fa-fw fa-lg"></i>
    </div>
  </div>
</div>




<nav class="site-nav">
  <ul id="menu" class="main-menu menu">
        <li class="menu-item menu-item-home">

    <a href="/" rel="section"><i class="fa fa-home fa-fw"></i>首页</a>

  </li>
        <li class="menu-item menu-item-about">

    <a href="/about/" rel="section"><i class="fa fa-user fa-fw"></i>关于</a>

  </li>
        <li class="menu-item menu-item-tags">

    <a href="/tags/" rel="section"><i class="fa fa-tags fa-fw"></i>标签<span class="badge">33</span></a>

  </li>
        <li class="menu-item menu-item-categories">

    <a href="/categories/" rel="section"><i class="fa fa-th fa-fw"></i>分类<span class="badge">18</span></a>

  </li>
        <li class="menu-item menu-item-archives">

    <a href="/archives/" rel="section"><i class="fa fa-archive fa-fw"></i>归档<span class="badge">49</span></a>

  </li>
      <li class="menu-item menu-item-search">
        <a role="button" class="popup-trigger"><i class="fa fa-search fa-fw"></i>搜索
        </a>
      </li>
  </ul>
</nav>



  <div class="search-pop-overlay">
    <div class="popup search-popup">
        <div class="search-header">
  <span class="search-icon">
    <i class="fa fa-search"></i>
  </span>
  <div class="search-input-container">
    <input autocomplete="off" autocapitalize="off"
           placeholder="搜索..." spellcheck="false"
           type="search" class="search-input">
  </div>
  <span class="popup-btn-close">
    <i class="fa fa-times-circle"></i>
  </span>
</div>
<div id="search-result">
  <div id="no-result">
    <i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>
  </div>
</div>

    </div>
  </div>

</div>
    </header>

    
  <div class="back-to-top">
    <i class="fa fa-arrow-up"></i>
    <span>0%</span>
  </div>
  <a role="button" class="book-mark-link book-mark-link-fixed"></a>

  <a href="https://github.com/wangxl12" class="github-corner" title="Follow me on GitHub" aria-label="Follow me on GitHub" rel="noopener" target="_blank"><svg width="80" height="80" viewBox="0 0 250 250" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a>


    <main class="main">
      <div class="main-inner">
        <div class="content-wrap">
          

          <div class="content post posts-expand">
            

    
  
  
  <article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
    <link itemprop="mainEntityOfPage" href="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="image" content="/images/td.jpg">
      <meta itemprop="name" content="WXL">
      <meta itemprop="description" content="">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="WXL's blog">
    </span>
      <header class="post-header">
        <h1 class="post-title" itemprop="name headline">
          多项式拟合
        </h1>

        <div class="post-meta">
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="far fa-calendar"></i>
              </span>
              <span class="post-meta-item-text">发表于</span>

              <time title="创建时间：2021-11-22 19:22:23" itemprop="dateCreated datePublished" datetime="2021-11-22T19:22:23+08:00">2021-11-22</time>
            </span>
              <span class="post-meta-item">
                <span class="post-meta-item-icon">
                  <i class="far fa-calendar-check"></i>
                </span>
                <span class="post-meta-item-text">更新于</span>
                <time title="修改时间：2021-11-23 11:14:18" itemprop="dateModified" datetime="2021-11-23T11:14:18+08:00">2021-11-23</time>
              </span>
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="far fa-folder"></i>
              </span>
              <span class="post-meta-item-text">分类于</span>
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/categories/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/" itemprop="url" rel="index"><span itemprop="name">深度学习</span></a>
                </span>
            </span>

          
            <span id="/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/" class="post-meta-item leancloud_visitors" data-flag-title="多项式拟合" title="阅读次数">
              <span class="post-meta-item-icon">
                <i class="fa fa-eye"></i>
              </span>
              <span class="post-meta-item-text">阅读次数：</span>
              <span class="leancloud-visitors-count"></span>
            </span>
  
  <span class="post-meta-item">
    
      <span class="post-meta-item-icon">
        <i class="far fa-comment"></i>
      </span>
      <span class="post-meta-item-text">Valine：</span>
    
    <a title="valine" href="/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/#valine-comments" itemprop="discussionUrl">
      <span class="post-comments-count valine-comment-count" data-xid="/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/" itemprop="commentCount"></span>
    </a>
  </span>
  
  

        </div>
      </header>

    
    
    
    <div class="post-body" itemprop="articleBody">

      
        <p>在学习《动手学习深度学习》教程的时候对于多项式拟合这一节有一些需要记录的内容.</p>
<p>原教程见这里：<a target="_blank" rel="noopener" href="https://zh-v2.d2l.ai/chapter_multilayer-perceptrons/underfit-overfit.html">https://zh-v2.d2l.ai/chapter_multilayer-perceptrons/underfit-overfit.html</a></p>
<span id="more"></span>
<h1>数据的生成与处理</h1>
<p>给定一条数据<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>x</mi></mrow><annotation encoding="application/x-tex">x</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">x</span></span></span></span>，我们使用一下的三阶多项式来生成训练和测试数据的标签：</p>
<p class="katex-block"><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>y</mi><mo>=</mo><mn>5</mn><mo>+</mo><mn>1.2</mn><mi>x</mi><mo>−</mo><mn>3.4</mn><mfrac><msup><mi>x</mi><mn>2</mn></msup><mrow><mn>2</mn><mo stretchy="false">!</mo></mrow></mfrac><mo>+</mo><mn>5.6</mn><mfrac><msup><mi>x</mi><mn>3</mn></msup><mrow><mn>3</mn><mo stretchy="false">!</mo></mrow></mfrac><mo>+</mo><mi>ϵ</mi><mtext> where </mtext><mi>ϵ</mi><mo>∼</mo><mi mathvariant="script">N</mi><mrow><mo fence="true">(</mo><mn>0</mn><mo separator="true">,</mo><mn>0.</mn><msup><mn>1</mn><mn>2</mn></msup><mo fence="true">)</mo></mrow></mrow><annotation encoding="application/x-tex">y=5+1.2 x-3.4 \frac{x^{2}}{2 !}+5.6 \frac{x^{3}}{3 !}+\epsilon \text { where } \epsilon \sim \mathcal{N}\left(0,0.1^{2}\right)
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">y</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:0.72777em;vertical-align:-0.08333em;"></span><span class="mord">5</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mbin">+</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span></span><span class="base"><span class="strut" style="height:0.72777em;vertical-align:-0.08333em;"></span><span class="mord">1</span><span class="mord">.</span><span class="mord">2</span><span class="mord mathdefault">x</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mbin">−</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span></span><span class="base"><span class="strut" style="height:2.177108em;vertical-align:-0.686em;"></span><span class="mord">3</span><span class="mord">.</span><span class="mord">4</span><span class="mord"><span class="mopen nulldelimiter"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.491108em;"><span style="top:-2.314em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord">2</span><span class="mclose">!</span></span></span><span style="top:-3.23em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line" style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathdefault">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span></span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mbin">+</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span></span><span class="base"><span class="strut" style="height:2.177108em;vertical-align:-0.686em;"></span><span class="mord">5</span><span class="mord">.</span><span class="mord">6</span><span class="mord"><span class="mopen nulldelimiter"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.491108em;"><span style="top:-2.314em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord">3</span><span class="mclose">!</span></span></span><span style="top:-3.23em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line" style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathdefault">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">3</span></span></span></span></span></span></span></span></span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.686em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mbin">+</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span></span><span class="base"><span class="strut" style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">ϵ</span><span class="mord text"><span class="mord"> where </span></span><span class="mord mathdefault">ϵ</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">∼</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:1.2141179999999998em;vertical-align:-0.35001em;"></span><span class="mord"><span class="mord mathcal" style="margin-right:0.14736em;">N</span></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="minner"><span class="mopen delimcenter" style="top:0em;"><span class="delimsizing size1">(</span></span><span class="mord">0</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord">0</span><span class="mord">.</span><span class="mord"><span class="mord">1</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.8641079999999999em;"><span style="top:-3.113em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">2</span></span></span></span></span></span></span></span></span><span class="mclose delimcenter" style="top:0em;"><span class="delimsizing size1">)</span></span></span></span></span></span></span></p>
<p>噪声项<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ϵ</mi></mrow><annotation encoding="application/x-tex">\epsilon</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">ϵ</span></span></span></span>服从均值为0且标准差为0.1的正态分布，在优化的过程中，我们通常希望避免非常大的梯度值或损失值。这就是将特征从<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>x</mi><mi>i</mi></msup></mrow><annotation encoding="application/x-tex">x^i</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.824664em;vertical-align:0em;"></span><span class="mord"><span class="mord mathdefault">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.824664em;"><span style="top:-3.063em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span></span></span></span></span></span></span>调整为<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mfrac><msup><mi>x</mi><mi>i</mi></msup><mrow><mi>i</mi><mo stretchy="false">!</mo></mrow></mfrac></mrow><annotation encoding="application/x-tex">\frac{x^i}{i!}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1.3704599999999998em;vertical-align:-0.345em;"></span><span class="mord"><span class="mopen nulldelimiter"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.0254599999999998em;"><span style="top:-2.6550000000000002em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mclose mtight">!</span></span></span></span><span style="top:-3.23em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line" style="border-bottom-width:0.04em;"></span></span><span style="top:-3.394em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight"><span class="mord mathdefault mtight">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.9020857142857143em;"><span style="top:-2.931em;margin-right:0.07142857142857144em;"><span class="pstrut" style="height:2.5em;"></span><span class="sizing reset-size3 size1 mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span></span></span></span></span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.345em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span>，这样可以避免很大的<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span>带来的特别大的指数值。我们将为训练集和测试集各生成100个样本。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">max_degree = <span class="number">20</span>  <span class="comment"># 多项式的最大阶数</span></span><br><span class="line">n_train, n_test = <span class="number">100</span>, <span class="number">100</span>  <span class="comment"># 训练集和测试集的大小</span></span><br><span class="line">true_w = np.zeros(max_degree)  <span class="comment"># 分配大量空间</span></span><br><span class="line">true_w[<span class="number">0</span>: <span class="number">4</span>] = np.array([<span class="number">5</span>, <span class="number">1.2</span>, -<span class="number">3.4</span>, <span class="number">5.6</span>])  <span class="comment"># 符合上述多项式分布的x特征的各个项的系数</span></span><br></pre></td></tr></table></figure>
<p>因为我们后面需要实现的效果是，通过修改传入的数值来改变多项式的项数，但是最大我们限制为20项。</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">features = np.random.normal(size=(n_train + n_test, <span class="number">1</span>))</span><br><span class="line">np.random.shuffle(features)</span><br><span class="line"><span class="comment"># print(features.shape)  # shape(200, 1)</span></span><br><span class="line">poly_features = np.power(features, np.arange(max_degree).reshape(<span class="number">1</span>, -<span class="number">1</span>))  <span class="comment"># features的所有行都执行一遍np.power(x[0: 20], np.arange(20))</span></span><br><span class="line"><span class="comment"># print(poly_features.shape)  # shape(200, 20)</span></span><br></pre></td></tr></table></figure>
<p>我们随机生成一个维度为<code>(200, 1)</code>的矩阵<code>features</code>，对于这个矩阵中的每一个元素都可以作为一个输入的<code>x</code>来代入上述三阶多项式获得最终的数据标签，所以接下来我们需要构造上述关于x的三阶多项式。</p>
<p>输入<code>np.power</code>函数里的两个参数的形状分别为<code>(200, 1)</code>和<code>(20, 1)</code>，最终得到的<code>poly_features</code>形状为<code>(200, 20)</code>，是如何操作的呢？我们举个例子：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">fea = np.array([[<span class="number">2</span>], [<span class="number">3</span>], [<span class="number">5</span>], [<span class="number">9</span>]])</span><br><span class="line">t = np.arange(<span class="number">3</span>).reshape(<span class="number">1</span>, -<span class="number">1</span>)  <span class="comment"># [[0],[1],[2]]</span></span><br><span class="line">answer = np.power(fea, t)</span><br><span class="line"><span class="built_in">print</span>(answer)</span><br></pre></td></tr></table></figure>
<p>也就是将每一个在<code>fea</code>里的元素都经过0次方、1次方、2次方，然后输出，得到的结果为：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">array([[ <span class="number">1</span>,  <span class="number">2</span>,  <span class="number">4</span>],</span><br><span class="line">       [ <span class="number">1</span>,  <span class="number">3</span>,  <span class="number">9</span>],</span><br><span class="line">       [ <span class="number">1</span>,  <span class="number">5</span>, <span class="number">25</span>],</span><br><span class="line">       [ <span class="number">1</span>,  <span class="number">9</span>, <span class="number">81</span>]], dtype=int32)</span><br></pre></td></tr></table></figure>
<p>这样我们就容易理解为什么<code>poly_features</code>最终的形状会变成<code>(200, 20)</code>，其中的200指的是200个输入的<code>x</code>，20指的是对每一个<code>x</code>都经过<code>0,1,2,...,19</code>次幂计算的结果，所以每一列都是对不同的<code>x</code>进行同一种幂指数的计算，然后我们对每一列除以相同的阶乘：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> <span class="built_in">range</span>(max_degree):</span><br><span class="line">    poly_features[:, i] /= math.gamma(i + <span class="number">1</span>)  <span class="comment"># gamma(n) = (n - 1)!: Γ(n)=(n−1)!</span></span><br></pre></td></tr></table></figure>
<p>最后我们还需要将权重（系数）和<code>x</code>相乘：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># labels的维度：(n_train + n_test,)</span></span><br><span class="line">labels = np.dot(poly_features, true_w)  <span class="comment"># labels.shape = (200, )</span></span><br></pre></td></tr></table></figure>
<p>其中，<code>poly_features.shape=(200, 20)</code>，<code>true_w.shape=(20, )</code>。</p>
<h1>训练、评估：</h1>
<p>教程中剩下来的内容好理解，就是常规的训练、评估的过程，如下：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">train</span>(<span class="params">train_features, test_features, train_labels, test_labels, num_epochs=<span class="number">400</span></span>):</span></span><br><span class="line">    loss = nn.MSELoss()</span><br><span class="line">    input_shape = train_features.shape[-<span class="number">1</span>]</span><br><span class="line">    </span><br><span class="line">    net = nn.Sequential(</span><br><span class="line">        nn.Linear(input_shape, <span class="number">1</span>, bias=<span class="literal">False</span>)</span><br><span class="line">    )</span><br><span class="line"></span><br><span class="line">    batch_size = <span class="built_in">min</span>(<span class="number">10</span>, train_labels.shape[<span class="number">0</span>])</span><br><span class="line">    train_iter = d2l.load_array((train_features, train_labels.reshape(-<span class="number">1</span>, <span class="number">1</span>)), batch_size)</span><br><span class="line">    test_iter = d2l.load_array((test_features, test_labels.reshape(-<span class="number">1</span>, <span class="number">1</span>)), batch_size)</span><br><span class="line">    </span><br><span class="line">    trainer = torch.optim.SGD(net.parameters(), lr=<span class="number">0.01</span>)</span><br><span class="line">    animator = d2l.Animator(xlabel=<span class="string">&#x27;epoch&#x27;</span>, ylabel=<span class="string">&#x27;loss&#x27;</span>, yscale=<span class="string">&#x27;log&#x27;</span>,</span><br><span class="line">                            xlim=[<span class="number">1</span>, num_epochs], ylim=[<span class="number">1e-3</span>, <span class="number">1e2</span>],</span><br><span class="line">                            legend=[<span class="string">&#x27;train&#x27;</span>, <span class="string">&#x27;test&#x27;</span>])</span><br><span class="line">    <span class="keyword">for</span> epoch <span class="keyword">in</span> <span class="built_in">range</span>(num_epochs):</span><br><span class="line">        d2l.train_epoch_ch3(net, train_iter, loss, trainer)</span><br><span class="line">        <span class="keyword">if</span> epoch == <span class="number">0</span> <span class="keyword">or</span> (epoch + <span class="number">1</span>) % <span class="number">20</span> == <span class="number">0</span>:</span><br><span class="line">            animator.add(epoch + <span class="number">1</span>, (d2l.evaluate_loss(net, train_iter, loss),</span><br><span class="line">                                     d2l.evaluate_loss(net, test_iter, loss)))</span><br><span class="line">    <span class="built_in">print</span>(<span class="string">&#x27;weight:&#x27;</span>, net[<span class="number">0</span>].weight.data.numpy())</span><br></pre></td></tr></table></figure>
<h1>拟合、欠拟合、过拟合：</h1>
<p><strong>欠拟合是指模型无法继续减少训练误差。过拟合是指训练误差远小于验证误差。</strong></p>
<p>需要注意的是，教程通过简单的修改输入数据的特征维数就可以实现不同项数的多项式拟合，为什么？</p>
<p>比如教程中通过如下代码实现较为接近的拟合结果：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">train(poly_features[: n_train, : <span class="number">4</span>], poly_features[n_train: , : <span class="number">4</span>], labels[: n_train], labels[n_train: ])</span><br></pre></td></tr></table></figure>
<p>输出：<code>weight: [[ 4.9948716  1.1728584 -3.4092236  5.648857 ]]</code></p>
<p>拟合结果如下：</p>
<p><img src="/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E6%AD%A3%E5%B8%B8.png" alt></p>
<p>通过修改4-&gt;2为欠拟合：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 从多项式特征中选择前2个维度，即 1, x</span></span><br><span class="line">train(poly_features[:n_train, :<span class="number">2</span>], poly_features[n_train:, :<span class="number">2</span>],</span><br><span class="line">      labels[:n_train], labels[n_train:])</span><br></pre></td></tr></table></figure>
<p>输出：<code>weight: [[3.4502087 3.227234 ]]</code></p>
<p>拟合结果如下：</p>
<p><img src="/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E6%AC%A0%E6%8B%9F%E5%90%88.png" alt></p>
<p>将所有的特征输入为过拟合：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 从多项式特征中选取所有维度</span></span><br><span class="line">train(poly_features[:n_train, :], poly_features[n_train:, :],</span><br><span class="line">      labels[:n_train], labels[n_train:], num_epochs=<span class="number">1500</span>)</span><br></pre></td></tr></table></figure>
<p>输出：<code>weight: [[ 4.9890423   1.2504098  -3.3738236   5.1632943  -0.15780173  1.4773566   0.35267243  0.36623996  0.11147966  0.18180016 -0.12584884 -0.19590111   0.18171196  0.10465395 -0.00926672  0.1231349   0.06259698 -0.18644147  -0.06582949 -0.05257604]]</code></p>
<p>拟合结果如下：</p>
<p><img src="/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E8%BF%87%E6%8B%9F%E5%90%88.png" alt></p>
<p>首先，项数越多，就越容易过拟合，这个容易理解，为什么呢？因为项数多，每一项前面的系数都作为参数需要学习，参数量大，<strong>在这种情况下，没有足够的数据用于学到高阶系数应该具有接近于零的值。因此，这个过于复杂的模型会轻易受到训练数据中噪声的影响。虽然训练损失可以有效地降低，但测试损失仍然很高。结果表明，复杂模型对数据造成了过拟合。</strong></p>
<p>程序会根据输入特征的维数来构建神经网络，假设输入的特征维度为：<code>(200, n)</code>，其中200表示样本的数量，在本案例中表示不同的<code>x</code>的值的数量，<code>n</code>表示关于<code>x</code>的多项式的项数，然后根据这个维度来构建全连接层，这个全连接层的形状为<code>(n, 1)</code>，参数的数量为<code>n * 1 = n</code>，这就解释了为什么输入的特征维度就决定了参数数量，也就是多项式的项数。<strong>所以从中我们也受到启发，即在输入数据的特征过多的时候，容易造成过拟合，所以限制特征的数量是缓解过拟合的一种常用技术。</strong></p>
<p>我们还可以发现，深度学习是一个全局调试所有的参数，从而使得所有的参数共同作用时可以满足大部分任务的过程。我们可以设想输入的是一张图片，为了好理解，假设都是全连接层，那么对于图像每一个像素都添加一个权重参数，然后全局调整这些参数的值从而使得网络能最大限度的精准分类图像的结果。</p>
<h1>绘制训练损失和模型复杂度（多项式的阶数）的关系图</h1>
<p>我们每一次设定不同的多项式阶数（输入X的特征维度）都会经过<code>num_epoch</code>轮迭代，每一轮中又会经过<code>N/batch_size</code>次迭代，所以一共经过<code>num_epoch*N/batch_size</code>次迭代之后，我们就认为当前设定的多项式阶数训练结束了，就需要将我们的训练损失以及当前的阶数绘画出来。基于这个思路我们修改代码如下：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">train</span>(<span class="params">train_features, test_features, train_labels, test_labels, num_epochs=<span class="number">400</span></span>):</span></span><br><span class="line">    loss = nn.MSELoss()</span><br><span class="line">    input_shape = train_features.shape[-<span class="number">1</span>]</span><br><span class="line">    </span><br><span class="line">    net = nn.Sequential(</span><br><span class="line">        nn.Linear(input_shape, <span class="number">1</span>, bias=<span class="literal">False</span>)</span><br><span class="line">    )</span><br><span class="line"></span><br><span class="line">    batch_size = <span class="built_in">min</span>(<span class="number">10</span>, train_labels.shape[<span class="number">0</span>])</span><br><span class="line">    train_iter = d2l.load_array((train_features, train_labels.reshape(-<span class="number">1</span>, <span class="number">1</span>)), batch_size)</span><br><span class="line"></span><br><span class="line">    trainer = torch.optim.SGD(net.parameters(), lr=<span class="number">0.01</span>)</span><br><span class="line">        </span><br><span class="line">    <span class="keyword">for</span> _ <span class="keyword">in</span> <span class="built_in">range</span>(num_epochs):</span><br><span class="line">        train_loss, train_acc = d2l.train_epoch_ch3(net, train_iter, loss, trainer)</span><br><span class="line">    <span class="keyword">return</span> train_loss, train_acc</span><br></pre></td></tr></table></figure>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line">animator = d2l.Animator(xlabel=<span class="string">&#x27;degree&#x27;</span>, ylabel=<span class="string">&#x27;loss&#x27;</span>, yscale=<span class="string">&#x27;log&#x27;</span>,</span><br><span class="line">                            xlim=[<span class="number">1</span>, <span class="number">20</span>], ylim=[<span class="number">1e-3</span>, <span class="number">1e2</span>],</span><br><span class="line">                            legend=[<span class="string">&#x27;train_loss&#x27;</span>])</span><br><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> <span class="built_in">range</span>(<span class="number">2</span>, <span class="number">21</span>):</span><br><span class="line">    train_loss, train_acc = train(poly_features[: n_train, : i], poly_features[n_train: , : i], labels[: n_train], labels[n_train: ])</span><br><span class="line">    animator.add(i - <span class="number">1</span>, train_loss)</span><br></pre></td></tr></table></figure>
<p>输出图像如下：</p>
<p><img src="/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E8%AE%AD%E7%BB%83%E6%8D%9F%E5%A4%B1.png" alt></p>
<p>从训练损失中可以看到，当阶数在3左右的时候训练损失是最小的，之后随着阶数的增加，训练损失不会下降反而微量上升。</p>
<h1>绘制训练损失和数据量的关系图</h1>
<p>从训练损失和阶数的关系图来看，当阶数为3的时候训练损失是最小的，我们就固定模型的阶数为3，然后成倍的增加数据量看看结果如何，同样，我们修改代码如下：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">train</span>(<span class="params">train_features, train_labels, num_epochs=<span class="number">400</span></span>):</span></span><br><span class="line">    loss = nn.MSELoss()</span><br><span class="line">    input_shape = train_features.shape[-<span class="number">1</span>]</span><br><span class="line">    </span><br><span class="line">    net = nn.Sequential(</span><br><span class="line">        nn.Linear(input_shape, <span class="number">1</span>, bias=<span class="literal">False</span>)</span><br><span class="line">    )</span><br><span class="line"></span><br><span class="line">    batch_size = <span class="built_in">min</span>(<span class="number">10</span>, train_labels.shape[<span class="number">0</span>])</span><br><span class="line">    train_iter = d2l.load_array((train_features, train_labels.reshape(-<span class="number">1</span>, <span class="number">1</span>)), batch_size)</span><br><span class="line">    </span><br><span class="line">    trainer = torch.optim.SGD(net.parameters(), lr=<span class="number">0.01</span>)</span><br><span class="line">    </span><br><span class="line">    <span class="keyword">for</span> _ <span class="keyword">in</span> <span class="built_in">range</span>(num_epochs):</span><br><span class="line">        train_loss, train_acc = d2l.train_epoch_ch3(net, train_iter, loss, trainer)</span><br><span class="line">    <span class="keyword">return</span> train_loss, train_acc</span><br><span class="line">            </span><br><span class="line">    <span class="comment"># print(&#x27;weight:&#x27;, net[0].weight.data.numpy())</span></span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">def</span> <span class="title">generate_features</span>(<span class="params">n</span>):</span></span><br><span class="line">    max_degree = <span class="number">20</span>  <span class="comment"># 多项式的最大阶数</span></span><br><span class="line"></span><br><span class="line">    true_w = np.zeros(max_degree)  <span class="comment"># 分配大量空间</span></span><br><span class="line">    true_w[<span class="number">0</span>: <span class="number">4</span>] = np.array([<span class="number">5</span>, <span class="number">1.2</span>, -<span class="number">3.4</span>, <span class="number">5.6</span>])  <span class="comment"># 符合上述多项式分布的x特征的各个项的系数</span></span><br><span class="line"></span><br><span class="line">    features = np.random.normal(size=(<span class="number">200</span> * n, <span class="number">1</span>))</span><br><span class="line">    np.random.shuffle(features)</span><br><span class="line"></span><br><span class="line">    poly_features = np.power(features, np.arange(max_degree).reshape(<span class="number">1</span>, -<span class="number">1</span>))  <span class="comment"># features的所有行都执行一遍np.power(x[0: 20], np.arange(20))</span></span><br><span class="line"></span><br><span class="line"></span><br><span class="line">    <span class="keyword">for</span> i <span class="keyword">in</span> <span class="built_in">range</span>(max_degree):</span><br><span class="line">        poly_features[:, i] /= math.gamma(i + <span class="number">1</span>)  <span class="comment"># gamma(n) = (n - 1)!: Γ(n)=(n−1)!</span></span><br><span class="line"></span><br><span class="line">    <span class="comment"># labels的维度：(n_train + n_test,)</span></span><br><span class="line">    labels = np.dot(poly_features, true_w)  <span class="comment"># labels.shape = (200, )</span></span><br><span class="line">    labels += np.random.normal(scale=<span class="number">0.1</span>, size=labels.shape)  <span class="comment"># 这个作为输入特征的偏置，不用在网络中定义偏置bias了</span></span><br><span class="line">    </span><br><span class="line">    <span class="keyword">return</span> poly_features, labels</span><br></pre></td></tr></table></figure>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><span class="line">n_train = <span class="number">200</span></span><br><span class="line">animator = d2l.Animator(xlabel=<span class="string">&#x27;data&#x27;</span>, ylabel=<span class="string">&#x27;loss&#x27;</span>, yscale=<span class="string">&#x27;log&#x27;</span>,</span><br><span class="line">                            xlim=[<span class="number">200</span>, <span class="number">2000</span>], ylim=[<span class="number">6e-3</span>, <span class="number">1e-1</span>],</span><br><span class="line">                            legend=[<span class="string">&#x27;train_loss&#x27;</span>])</span><br><span class="line">poly_features, labels = generate_features(<span class="number">10</span>)</span><br><span class="line">poly_features = torch.tensor(poly_features, dtype=torch.float32)</span><br><span class="line">labels = torch.tensor(labels, dtype=torch.float32)</span><br><span class="line"></span><br><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> <span class="built_in">range</span>(<span class="number">1</span>, <span class="number">11</span>):</span><br><span class="line">    train_loss, train_acc = train(poly_features[: n_train * i, : <span class="number">4</span>], labels[: n_train * i])</span><br><span class="line">    <span class="built_in">print</span>(train_loss)</span><br><span class="line">    animator.add(i * n_train, train_loss)</span><br></pre></td></tr></table></figure>
<p>得到的图像如下：</p>
<p><img src="/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E6%95%B0%E6%8D%AE%E9%87%8F.png" alt></p>
<p>从图中观察到从数据量200往后越来越多之后，训练误差也在变高，可能是数据量变化的幅度太大，观察的结果不够明显。</p>
<h1>如果不对多项式特征<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msup><mi>x</mi><mi>i</mi></msup></mrow><annotation encoding="application/x-tex">x^i</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.824664em;vertical-align:0em;"></span><span class="mord"><span class="mord mathdefault">x</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.824664em;"><span style="top:-3.063em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span></span></span></span></span></span></span></span>进行标准化(1/<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><mo stretchy="false">!</mo></mrow><annotation encoding="application/x-tex">i!</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.69444em;vertical-align:0em;"></span><span class="mord mathdefault">i</span><span class="mclose">!</span></span></span></span>​)，会怎样？</h1>
<p>将这部分代码注释掉：</p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">for</span> i <span class="keyword">in</span> <span class="built_in">range</span>(max_degree):</span><br><span class="line">    poly_features[:, i] /= math.gamma(i + <span class="number">1</span>)  <span class="comment"># gamma(n) = (n - 1)!: Γ(n)=(n−1)!</span></span><br></pre></td></tr></table></figure>
<p>进行训练得到如下的结果：</p>
<p><img src="/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E4%B8%8D%E6%A0%87%E5%87%86%E5%8C%96.png" alt></p>
<p>按照正常情况应该会造成梯度或者损失过大的，但是这里没有出现那种情况。如果我不断的增加多项式的项数进行训练呢？结果如下：</p>
<p><img src="/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/%E4%B8%8D%E6%A0%87%E5%87%86%E5%8C%96%E5%A2%9E%E5%8A%A0%E5%A4%9A%E9%A1%B9%E5%BC%8F%E9%A1%B9%E6%95%B0.png" alt></p>
<p>之后没画出来是因为损失为<code>NaN</code>，一般是梯度爆炸造成的，解决梯度爆炸可以使用如下方式解决：</p>
<ol>
<li>对训练集的输出（label）使用分为标准化，抑制过大的输出；</li>
<li>为模型增加正则化；</li>
<li>减少模型的规模；</li>
<li>增大batch_size大小。</li>
</ol>
<p>Reference:</p>
<p><a target="_blank" rel="noopener" href="https://blog.csdn.net/xovee/article/details/92762035">https://blog.csdn.net/xovee/article/details/92762035</a></p>

    </div>

    
    
    
        <div class="reward-container">
  <div>行行好，赏一杯咖啡吧~</div>
  <button onclick="var qr = document.getElementById('qr'); qr.style.display = (qr.style.display === 'none') ? 'block' : 'none';">
    打赏
  </button>
  <div id="qr" style="display: none;">
      
      <div style="display: inline-block;">
        <img src="/images/wechatpay.jpg" alt="WXL 微信支付">
        <p>微信支付</p>
      </div>
      
      <div style="display: inline-block;">
        <img src="/images/alipay.jpg" alt="WXL 支付宝">
        <p>支付宝</p>
      </div>

  </div>
</div>

        

<div>
<ul class="post-copyright">
  <li class="post-copyright-author">
    <strong>本文作者： </strong>WXL
  </li>
  <li class="post-copyright-link">
    <strong>本文链接：</strong>
    <a href="https://wangxl12.github.io/2021/11/22/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E5%A4%9A%E9%A1%B9%E5%BC%8F%E6%8B%9F%E5%90%88/" title="多项式拟合">https://wangxl12.github.io/2021/11/22/深度学习/多项式拟合/</a>
  </li>
  <li class="post-copyright-license">
    <strong>版权声明： </strong>本博客所有文章除特别声明外，均采用 <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" rel="noopener" target="_blank"><i class="fab fa-fw fa-creative-commons"></i>BY-NC-SA</a> 许可协议。转载请注明出处！
  </li>
</ul>
</div>


      <footer class="post-footer">
          
          <div class="post-tags">
              <a href="/tags/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/" rel="tag"><i class="fa fa-tag"></i> 深度学习</a>
          </div>

        


        
    <div class="post-nav">
      <div class="post-nav-item">
    <a href="/2021/11/14/Git/%E5%B0%8F%E7%BB%84%E5%90%88%E4%BD%9C%E9%A1%B9%E7%9B%AE%E4%BD%BF%E7%94%A8Git%E7%9A%84%E6%AD%A3%E7%A1%AE%E5%A7%BF%E5%8A%BF/" rel="prev" title="小组合作项目使用Git的合理姿势">
      <i class="fa fa-chevron-left"></i> 小组合作项目使用Git的合理姿势
    </a></div>
      <div class="post-nav-item">
    <a href="/2021/11/23/%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0/%E6%9D%83%E9%87%8D%E8%A1%B0%E5%87%8F/" rel="next" title="权重衰减">
      权重衰减 <i class="fa fa-chevron-right"></i>
    </a></div>
    </div>
      </footer>
    
  </article>
  
  
  



          </div>
          
    <div class="comments" id="valine-comments"></div>

<script>
  window.addEventListener('tabs:register', () => {
    let { activeClass } = CONFIG.comments;
    if (CONFIG.comments.storage) {
      activeClass = localStorage.getItem('comments_active') || activeClass;
    }
    if (activeClass) {
      let activeTab = document.querySelector(`a[href="#comment-${activeClass}"]`);
      if (activeTab) {
        activeTab.click();
      }
    }
  });
  if (CONFIG.comments.storage) {
    window.addEventListener('tabs:click', event => {
      if (!event.target.matches('.tabs-comment .tab-content .tab-pane')) return;
      let commentClass = event.target.classList[1];
      localStorage.setItem('comments_active', commentClass);
    });
  }
</script>

        </div>
          
  
  <div class="toggle sidebar-toggle">
    <span class="toggle-line toggle-line-first"></span>
    <span class="toggle-line toggle-line-middle"></span>
    <span class="toggle-line toggle-line-last"></span>
  </div>

  <aside class="sidebar">
    <div class="sidebar-inner">

      <ul class="sidebar-nav motion-element">
        <li class="sidebar-nav-toc">
          文章目录
        </li>
        <li class="sidebar-nav-overview">
          站点概览
        </li>
      </ul>

      <!--noindex-->
      <div class="post-toc-wrap sidebar-panel">
          <div class="post-toc motion-element"><ol class="nav"><li class="nav-item nav-level-1"><a class="nav-link"><span class="nav-number">1.</span> <span class="nav-text">数据的生成与处理</span></a></li><li class="nav-item nav-level-1"><a class="nav-link"><span class="nav-number">2.</span> <span class="nav-text">训练、评估：</span></a></li><li class="nav-item nav-level-1"><a class="nav-link"><span class="nav-number">3.</span> <span class="nav-text">拟合、欠拟合、过拟合：</span></a></li><li class="nav-item nav-level-1"><a class="nav-link"><span class="nav-number">4.</span> <span class="nav-text">绘制训练损失和模型复杂度（多项式的阶数）的关系图</span></a></li><li class="nav-item nav-level-1"><a class="nav-link"><span class="nav-number">5.</span> <span class="nav-text">绘制训练损失和数据量的关系图</span></a></li><li class="nav-item nav-level-1"><a class="nav-link"><span class="nav-number">6.</span> <span class="nav-text">如果不对多项式特征xix^ixi进行标准化(1&#x2F;i!i!i!​)，会怎样？</span></a></li></ol></div>
      </div>
      <!--/noindex-->

      <div class="site-overview-wrap sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
    <img class="site-author-image" itemprop="image" alt="WXL"
      src="/images/td.jpg">
  <p class="site-author-name" itemprop="name">WXL</p>
  <div class="site-description" itemprop="description"></div>
</div>
<div class="site-state-wrap motion-element">
  <nav class="site-state">
      <div class="site-state-item site-state-posts">
          <a href="/archives/">
        
          <span class="site-state-item-count">49</span>
          <span class="site-state-item-name">日志</span>
        </a>
      </div>
      <div class="site-state-item site-state-categories">
            <a href="/categories/">
          
        <span class="site-state-item-count">18</span>
        <span class="site-state-item-name">分类</span></a>
      </div>
      <div class="site-state-item site-state-tags">
            <a href="/tags/">
          
        <span class="site-state-item-count">33</span>
        <span class="site-state-item-name">标签</span></a>
      </div>
  </nav>
</div>
  <div class="links-of-author motion-element">
      <span class="links-of-author-item">
        <a href="https://github.com/wangxl12" title="GitHub → https:&#x2F;&#x2F;github.com&#x2F;wangxl12" rel="noopener" target="_blank"><i class="fab fa-github fa-fw"></i>GitHub</a>
      </span>
      <span class="links-of-author-item">
        <a href="mailto:wxl.1.2.3@qq.com" title="E-Mail → mailto:wxl.1.2.3@qq.com" rel="noopener" target="_blank"><i class="fa fa-envelope fa-fw"></i>E-Mail</a>
      </span>
  </div>
  <div class="cc-license motion-element" itemprop="license">
    <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" class="cc-opacity" rel="noopener" target="_blank"><img src="/images/cc-by-nc-sa.svg" alt="Creative Commons"></a>
  </div>


  <div class="links-of-blogroll motion-element">
    <div class="links-of-blogroll-title"><i class="fa fa-link fa-fw"></i>
      Links
    </div>
    <ul class="links-of-blogroll-list">
        <li class="links-of-blogroll-item">
          <a href="https://blog.csdn.net/weixin_43141320?spm=1000.2115.3001.5343" title="https:&#x2F;&#x2F;blog.csdn.net&#x2F;weixin_43141320?spm&#x3D;1000.2115.3001.5343" rel="noopener" target="_blank">My CSDN Blog</a>
        </li>
        <li class="links-of-blogroll-item">
          <a href="http://blog.kilig.ink/" title="http:&#x2F;&#x2F;blog.kilig.ink&#x2F;" rel="noopener" target="_blank">HuangPiSong</a>
        </li>
    </ul>
  </div>

      </div>

    </div>
  </aside>
  <div id="sidebar-dimmer"></div>


      </div>
    </main>

    <footer class="footer">
      <div class="footer-inner">
        

        

<div class="copyright">
  
  &copy; 
  <span itemprop="copyrightYear">2022</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">WXL</span>
</div>
  <div class="powered-by">由 <a href="https://hexo.io/" class="theme-link" rel="noopener" target="_blank">Hexo</a> & <a href="https://theme-next.org/" class="theme-link" rel="noopener" target="_blank">NexT.Gemini</a> 强力驱动
  </div>

        








      </div>
    </footer>
  </div>

  
  <script src="/lib/anime.min.js"></script>
  <script src="/lib/velocity/velocity.min.js"></script>
  <script src="/lib/velocity/velocity.ui.min.js"></script>

<script src="/js/utils.js"></script>

<script src="/js/motion.js"></script>


<script src="/js/schemes/pisces.js"></script>


<script src="/js/next-boot.js"></script>

<script src="/js/bookmark.js"></script>


  <script defer src="/lib/three/three.min.js"></script>
    <script defer src="/lib/three/three-waves.min.js"></script>


  
  <script>
    (function(){
      var canonicalURL, curProtocol;
      //Get the <link> tag
      var x=document.getElementsByTagName("link");
		//Find the last canonical URL
		if(x.length > 0){
			for (i=0;i<x.length;i++){
				if(x[i].rel.toLowerCase() == 'canonical' && x[i].href){
					canonicalURL=x[i].href;
				}
			}
		}
    //Get protocol
	    if (!canonicalURL){
	    	curProtocol = window.location.protocol.split(':')[0];
	    }
	    else{
	    	curProtocol = canonicalURL.split(':')[0];
	    }
      //Get current URL if the canonical URL does not exist
	    if (!canonicalURL) canonicalURL = window.location.href;
	    //Assign script content. Replace current URL with the canonical URL
      !function(){var e=/([http|https]:\/\/[a-zA-Z0-9\_\.]+\.baidu\.com)/gi,r=canonicalURL,t=document.referrer;if(!e.test(r)){var n=(String(curProtocol).toLowerCase() === 'https')?"https://sp0.baidu.com/9_Q4simg2RQJ8t7jm9iCKT-xh_/s.gif":"//api.share.baidu.com/s.gif";t?(n+="?r="+encodeURIComponent(document.referrer),r&&(n+="&l="+r)):r&&(n+="?l="+r);var i=new Image;i.src=n}}(window);})();
  </script>




  
<script src="/js/local-search.js"></script>













  

  
      

<script>
  if (typeof MathJax === 'undefined') {
    window.MathJax = {
      loader: {
          load: ['[tex]/mhchem'],
        source: {
          '[tex]/amsCd': '[tex]/amscd',
          '[tex]/AMScd': '[tex]/amscd'
        }
      },
      tex: {
        inlineMath: {'[+]': [['$', '$']]},
          packages: {'[+]': ['mhchem']},
        tags: 'ams'
      },
      options: {
        renderActions: {
          findScript: [10, doc => {
            document.querySelectorAll('script[type^="math/tex"]').forEach(node => {
              const display = !!node.type.match(/; *mode=display/);
              const math = new doc.options.MathItem(node.textContent, doc.inputJax[0], display);
              const text = document.createTextNode('');
              node.parentNode.replaceChild(text, node);
              math.start = {node: text, delim: '', n: 0};
              math.end = {node: text, delim: '', n: 0};
              doc.math.push(math);
            });
          }, '', false],
          insertedScript: [200, () => {
            document.querySelectorAll('mjx-container').forEach(node => {
              let target = node.parentNode;
              if (target.nodeName.toLowerCase() === 'li') {
                target.parentNode.classList.add('has-jax');
              }
            });
          }, '', false]
        }
      }
    };
    (function () {
      var script = document.createElement('script');
      script.src = '//cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js';
      script.defer = true;
      document.head.appendChild(script);
    })();
  } else {
    MathJax.startup.document.state(0);
    MathJax.texReset();
    MathJax.typeset();
  }
</script>

    

  


<script>
NexT.utils.loadComments(document.querySelector('#valine-comments'), () => {
  NexT.utils.getScript('//unpkg.com/valine/dist/Valine.min.js', () => {
    var GUEST = ['nick', 'mail', 'link'];
    var guest = 'nick,mail,link';
    guest = guest.split(',').filter(item => {
      return GUEST.includes(item);
    });
    new Valine({
      el         : '#valine-comments',
      verify     : false,
      notify     : false,
      appId      : 's5o4gRGNyYbPVRfziI5EzhO1-gzGzoHsz',
      appKey     : 'N7aQtR2SU9AwgaO2YtVhRO0W',
      placeholder: "发表你的评论吧~",
      avatar     : 'mm',
      meta       : guest,
      pageSize   : '10' || 10,
      visitor    : true,
      lang       : 'zh-cn' || 'zh-cn',
      path       : location.pathname,
      recordIP   : true,
      serverURLs : ''
    });
  }, window.Valine);
});
</script>

</body>
</html>
